"""
By Lei Gao
usage: Rescue_repetitive_anchor.py [-h] --Initial_block INITIAL_BLOCK
                                   --Final_block FINAL_BLOCK --Raw_Unique
                                   RAW_UNIQUE

optional arguments:
  -h, --help            show this help message and exit
  --Initial_block INITIAL_BLOCK
                        Synteny_block.py output raw synteny blocks
  --Final_block FINAL_BLOCK
                        Call_SV_between_anchors.py cleaned and rescued some
                        anchors
  --Raw_Unique RAW_UNIQUE
                        Raw unique anchors generated by Assemblytics

"""
import argparse
from pathlib import Path
from operator import itemgetter
import re
import os

parser = argparse.ArgumentParser()
parser.add_argument("--Initial_block", type=str, help="Synteny_block.py output raw synteny blocks", required=True, default="")
parser.add_argument("--Final_block", type=str, help="Call_SV_between_anchors.py cleaned and rescued some anchors", required=True, default="")
parser.add_argument("--Raw_Unique", type=str, help="Raw unique anchors generated by Assemblytics", required=True, default="")

args = parser.parse_args()

Initial_block = args.Initial_block
Final_block = args.Final_block
Raw_Unique = args.Raw_Unique



def check_key(cells,raw_keys):
    ref_start = cells[1]
    ref_end = cells[2]
    query_start = cells[5]
    query_end = cells[6]
    ref = cells[0]
    ref_length = Ref_chr_size[ref]
    query = cells[4]
    query_length = Qry_chr_size[query]
    key = "\t".join([ref_start, ref_end, query_start, query_end, ref_length, query_length, ref, query])
    if key not in raw_keys:
        print key
        raw_keys.add(key)


''' Step 0.0 Get used anchors

'''
raw_keys = set()
Ref_chr_size = {}
Qry_chr_size = {}

with open(Raw_Unique) as input_file:
    for line in input_file:
        cells = line.strip().split("\t")
        key = "\t".join(map(str, cells))
        raw_keys.add(key)
        Ref_chr_size[cells[6]] = cells[4]
        Qry_chr_size[cells[7]] = cells[5]

'''
Step 1.0 check Initial_block
'''
i = -1
with open(Initial_block) as infile:
    for line in infile:
        i += 1
        if i > 0:
            cells = line.strip().split("\t")
            check_key(cells[0:8],raw_keys)


'''
Step 2.0 check Final_block
'''
i = -1
with open(Final_block) as infile:
    for line in infile:
        i += 1
        if i > 0:
            cells = line.strip().split("\t")
            check_key(cells[2:10],raw_keys)















#srf
